#############################################################################
# Fixed effect regression
#
# - Baseline data
#
# 1. Homogeneous slopes with year FE
# 2. Homogeneous slopes with state-year FE
# 3. Heterogeneous slopes with state-year FE
# 4. FE regressions in short panel
#
#############################################################################

rm(list=ls())
graphics.off()

library(data.table)
library(ggplot2)
library(haven)
library(fixest)
library("RColorBrewer")
library(xtable)
library(Hmisc)

save_dta = "Data_new"
save_res = "Results/"
memory.limit(size=200000)
time0=Sys.time()

#############################################################################
# 1. Homogeneous slopes with year FE
#############################################################################

rm(list=ls())
load("Data_new/Data_baseline.RData")

# Estimate (clustered errors by default)
t0=Sys.time()
f1 = feols(yield ~  precr + ddr | factor(fips) + factor(year) ,data=dt)
t1=Sys.time()
ft1 = t1-t0
ft1
summary(f1)

# Check number of coefficients: 2 weather
nobs(f1)
dim(f1$coeftable)
summary(f1$coeftable)

# Temperature per county-year 
dt_plot1 = dt[,.SD,.SDcols=c("state","fips","year","corn_area","ddr","nobs")]

# Add estimated slope to the data and marginal effects
dt_plot1[,coef:=f1$coeftable["ddr",1]]
dt_plot1[,se:=f1$coeftable["ddr",2]]
dt_plot1[,mg1:=coef]
dim(dt_plot1)
summary(dt_plot1)

save.image("Data_new/FE_results.RData")

#############################################################################
# 2. Homogeneous slopes with state-year FE
#############################################################################

# Estimate (clustered errors by default)
t0=Sys.time()
f2 = feols(yield ~  precr + ddr | factor(fips) + factor(year)^factor(state),data=dt)
t1=Sys.time()
ft2 = t1-t0
ft2
summary(f2)

# Check number of coefficients: 2 weather
nobs(f2)
dim(f2$coeftable)
summary(f2$coeftable)

# Temperature per county-year
dt_plot2 = dt[,.SD,.SDcols=c("state","fips","year","corn_area","ddr","nobs")]

# Add estimated slope to the data and marginal effects
dt_plot2[,coef:=f2$coeftable["ddr",1]]
dt_plot2[,se:=f2$coeftable["ddr",2]]
dt_plot2[,mg2:=coef]
dim(dt_plot2)
summary(dt_plot2)

save.image("Data_new/FE_results.RData")

#############################################################################
# 3. Heterogeneous slopes with state-year FE
#############################################################################

# Estimate
t0=Sys.time()
f3 = feols(yield ~  precr + ddr:factor(fips) | factor(fips) + factor(year)^factor(state),data=dt)
t1=Sys.time()
ft3 = t1-t0
ft3
summary(f3)

# Check number of coefficients: 2253 slopes + 1 precr 
nobs(f3)
dim(f3$coeftable)
summary(f3$coeftable)

# Temperature per county-year
dt_plot3 = dt[,.SD,.SDcols=c("state","fips","year","corn_area","ddr","nobs")]

# Add estimated slope to the data and marginal effects
source("Code/0. CODE Auxiliary.R")
dslop = slopes_dt(f=f3,names_var="")
dim(dt_plot3)
dt_plot3 = merge(dt_plot3,dslop,by="fips",all=TRUE)
dim(dt_plot3)
dt_plot3[,mg3:=coef]
summary(dt_plot3)

# Put 3 versions together
names(dt_plot1)
names(dt_plot2)
names(dt_plot3)
summary(dt_plot1$ddr)
summary(dt_plot2$ddr)
summary(dt_plot3$ddr)
dt_res = merge(dt_plot1[,.SD,.SDcols=c("state","fips","year","corn_area","ddr","nobs","mg1")],
               dt_plot2[,.SD,.SDcols=c("fips","year","mg2")],by=c("fips","year"),all=TRUE)
dt_res = merge(dt_res,dt_plot3[,.SD,.SDcols=c("fips","year","mg3")],by=c("fips","year"),all=TRUE)
dim(dt_res)
summary(dt_res)

rm(dt_plot1,dt_plot2,dt_plot3,t0,t1,dslop,ft1,ft2,ft3)
save.image("Data_new/FE_results.RData")

#############################################################################
# 4. FE regressions in short panel
#############################################################################

rm(list=ls())
load("Data_new/FE_results.RData")
summary(dt$year)
length(unique(dt$year))

# Count number observations 1990 or after
dt[, aux:=(year>=1990)]
dt[,nobs_short := sum(aux),by="fips"]
summary(dt$nobs_short)
summary(dt[year>=1990,nobs_short])
dim(dt_res)
dt_res = merge(dt_res,dt[,.(fips,year,nobs_short)])
dim(dt_res)

# Keep counties with at least 10 years in this shorter panel
dts = dt[nobs_short>=10 & year>=1990]
nrow(dts)
summary(dts$nobs_short)
summary(dts$year)
length(unique(dts$fips))
# 1,779 counties, and 27,661 observations, from 1990 to 2005. Counties with at least 10 observations in this range

# Short panel
fs = feols(yield ~  precr + ddr:factor(fips) | factor(fips) + factor(year)^factor(state),data=dts)
fs$collin.var
summary(fs)

# Add estimated slope to the data and marginal effects (for every year)
source("Code/0. CODE Auxiliary.R")
dslop = slopes_dt(f=fs,names_var="")
dim(dt_res)
dt_res = merge(dt_res,dslop,by="fips",all=TRUE)
dim(dt_res)
dt_res[,mgs:=coef]
dt_res[,c("coef","se"):=NULL]
summary(dt_res)

time1=Sys.time()
save.image("Data_new/FE_results.RData")

